Chasing Peaks

Author

Adam Kemberling

Published

November 22, 2024

Code
# Libraries
library(tidyverse)
library(gmRi)
library(scales)
library(gt)
library(patchwork)
library(gtsummary)
library(gt)
library(sizeSpectra)
library(gganimate)
library(glue)
conflicted::conflict_prefer("select", "dplyr")
conflicted::conflict_prefer("filter", "dplyr")

# Processing functions
source(here::here("Code/R/Processing_Functions.R"))

# ggplot theme
theme_set(
  theme_gmri(
    axis.line.y = element_line(),
    axis.ticks.y = element_line(), 
    rect = element_rect(fill = "white", color = NA),
    panel.grid.major.y = element_blank(),
    strip.text.y = element_text(angle  = 0),
    axis.text.x = element_text(size = 8),
    axis.text.y = element_text(size = 8),
    legend.position = "bottom"))



# labels for factor levels
area_levels <- c("GoM", "GB", "SNE", "MAB")
area_levels_long <- c("Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight")
area_levels_all <- c("Northeast Shelf", area_levels)
area_levels_long_all <- c("Northeast Shelf", area_levels_long)

# table to join for swapping shorthand for long-hand names
area_df <- data.frame(
  area = c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"),
  survey_area = c("SS", "GoM", "GB", "SNE", "MAB", "Northeast Shelf"),
  area_titles = c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"))


# Degree symbol
deg_c <- "\u00b0C"

Determining Spectra Minimum Sizes

Size spectra typically focus on the descending slope of the size distribution. This doc covers how that minima is selected and the process for doing so, and then the consequence of shifting the minima of the bounded distribution.

Code
# Data used for Wigley estimates
wigley_in <- read_csv(here::here("Data/processed/wigley_species_trawl_data.csv")) %>% left_join(area_df)

The general idea is to bin the individual abundances by log2 size-bins, then determine the peak.

The following functions will aid in the labeling of log2 bins:

Code
# Broad Distribution
#log2 bins for easy-of-access
#' @title Build Log 2 Bin Structure Dataframe
#' 
#' @description Used to build a dataframe containing equally spaced log2 bins for
#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, 
#' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.
#'
#' @param log2_min 
#' @param log2_max 
#' @param log2_increment 
#'
#' @return
#' @export
#'
#' @examples
define_log2_bins <- function(log2_min = 0, log2_max = 13, log2_increment = 1){
  
  # How many bins
  n_bins  <- length(seq(log2_max, log2_min + log2_increment, by = -log2_increment))
  
  # Build Equally spaced log2 bin df
  log2_bin_structure <- data.frame(
    "log2_bins" = as.character(seq(n_bins, 1, by = -1)),
    "left_lim"  = seq(log2_max - log2_increment, log2_min, by = -log2_increment),
    "right_lim" = seq(log2_max, log2_min + log2_increment, by = -log2_increment)) %>% 
    mutate(
      bin_label    = str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),
      bin_width    = 2^right_lim - 2^left_lim,
      bin_midpoint = (2^right_lim + 2^left_lim) / 2) %>% 
    arrange(left_lim)
  
  return(log2_bin_structure)
}






#' @title Assign Manual log2 Bodymass Bins - By weight
#'
#' @description Manually assign log2 bins based on individual length-weight bodymass 
#' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual
#' length-weight biomass
#' 
#' Uses maximum weight, and its corresponding bin as the limit.
#'
#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax
#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.
#'
#' @return
#' @export
#'
#' @examples
assign_log2_bins <- function(wmin_grams, log2_increment = 1){
  
  
  #### 1. Set up bodymass bins
  
  # filter missing weights
  size_data <- wmin_grams %>% 
    filter(wmin_g > 0,
           is.na(wmin_g) == FALSE,
           wmax_g > 0,
           is.na(wmax_g) == FALSE)
  
  # Get bodymass on log2() scale
  size_data$log2_weight <- log2(size_data$ind_weight_g)
  
  # Set up the bins - Pull min and max weights from data available
  #min_bin <- floor(min(size_data$log2_weight))
  min_bin <- 0
  max_bin <- ceiling(max(size_data$log2_weight))
  
  
  # Build a bin key, could be used to clean up the incremental assignment or for apply style functions
  log2_bin_structure <- define_log2_bins(
    log2_min = min_bin, 
    log2_max = max_bin, 
    log2_increment = log2_increment)
  
  
  
  # Loop through bins to assign the bin details to the data
  log2_assigned <- log2_bin_structure %>%
    split(.$log2_bins) %>%
    map_dfr(function(log2_bin){
      
      # limits and labels
      l_lim   <- log2_bin$left_lim
      r_lim   <- log2_bin$right_lim
      bin_num <- as.character(log2_bin$log2_bin)
      
      # assign the label to the appropriate bodymasses
      size_data %>% mutate(
        log2_bins = ifelse( between(log2_weight, l_lim, r_lim), bin_num, NA),
        log2_bins = as.character(log2_bins)) %>%
        drop_na(log2_bins)
      
    })
  
  # Join in the size bins
  log2_assigned <- left_join(log2_assigned, log2_bin_structure, by = "log2_bins")
  
  # return the data with the bins assigned
  return(log2_assigned)
  
}








#' @title Calculate Normalized and De-Normalized Abundances
#'
#' @description For binned size spectra estimation we use the stratified abundance divided by the
#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which 
#' takes those values and multiplies them by the mid-point of the log-scale bins.
#' 
#' min/max & bin_increments are used to enforce the presence of a size bin in the event that 
#' there is no abundance. This is done for comparing across different groups/areas that should 
#' conceivably have the same size range sampled.
#'
#' @param log2_assigned size data containing the bin assignments to use
#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)
#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)
#' @param bin_increment The bin-width on log scale that separates each bin
#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves
#'
#' @return
#' @export
#'
#' @examples
aggregate_log2_bins <- function(
    log2_assigned, 
    min_log2_bin = 0, 
    max_log2_bin = 13, 
    bin_increment = 1,
    ...){
  
  # Full Possible Bin Structure
  # Fills in any gaps
  log2_bin_structure <- define_log2_bins(
    log2_min       = min_log2_bin, 
    log2_max       = max_log2_bin, 
    log2_increment = bin_increment)
  
  
  # Capture all the group levels with a cheeky expand()
  if(missing(...) == FALSE){
    log2_bin_structure <- log2_bin_structure %>% 
      tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>% 
      full_join(log2_bin_structure)
  }
  
  
  
  # Get bin breaks
  log2_breaks <- sort(
    unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))
  
  
  # Get Totals for bodymass and abundances
  log2_aggregates <- log2_assigned %>% 
    group_by(left_lim, ...) %>% 
    summarise(abundance   = sum(numlen_adj, na.rm = T),
              weight_g    = sum(wmin_g, na.rm = T),
              .groups = "drop")
  
  
  # Join back in what the limits and labels are
  # The defined bins and their labels enforce the size limits
  log2_prepped <- left_join(
    x = log2_bin_structure, 
    y = log2_aggregates)
  
  
  #### Fill Gaps with Zero's?? 
  # This ensures that any size bin that is intended to be in use is actually used
  log2_prepped <- log2_prepped %>% 
    mutate(
      abundance = ifelse(is.na(abundance), 1, abundance),
      weight_g = ifelse(is.na(weight_g), 1, weight_g))
  
  
  #### normalize abundances using the bin widths
  log2_prepped <- log2_prepped %>% 
    mutate(
      normalized_abund = abundance / bin_width,
      normalized_biom = weight_g / bin_width,
      # # Remove Bins Where Normalized Biomass < 0? No!
      # normalized_abund = ifelse(normalized_abund < 2^0, NA, normalized_abund),
      # norm_strat_abund = ifelse(norm_strat_abund < 2^0, NA, norm_strat_abund)
    )
  
  # Add de-normalized abundances (abundance * bin midpoint)
  log2_prepped <- log2_prepped %>% 
    mutate(
      denorm_abund = normalized_abund * bin_midpoint,
      denorm_biom = normalized_biom * bin_midpoint)
  
  # Return the aggregations
  return(log2_prepped)
  
}
Code
# Assign l2 bins
wig_l2 <- assign_log2_bins(wigley_in, log2_increment = 1)

# Create Shelf aggregate
wig_l2 <- wig_l2 %>% 
  mutate(area = "Northeast Shelf", 
         survey_area = "Northeast Shelf") %>% 
  bind_rows(wig_l2) %>% 
  mutate(area = factor(area, levels = area_levels_long_all),
         season = factor(season, levels = c("Spring", "Fall")))

From there, we can use one group as an example. In this plot peak normalized abundance is at the 2-4g bin.

This is not the most straightforward example because its multi-modal, but this will happen so it is instructive to use.

This example also highlights the common situation where normalized abundance approaches 0, which I’m still unsure how much that matters.

Code
# Group details
glabs <- list(
  "yr" = 2000,
  "area" = "MAB",
  "season" = "Spring"
)

# Filter that out
test_case <- wig_l2 %>% 
  filter(
    est_year == glabs$yr, 
    survey_area == glabs$area, 
    season == glabs$season)



# Plot the thing - cheater way
ggplot(test_case) +
  geom_histogram(
    aes(
      x = wmin_g, 
      weight = (numlen_adj/bin_width),
      color = after_stat(count) == max(after_stat(count))), 
    breaks = 2^c(0:15), 
    linewidth = 1) +
  scale_color_manual(values = c("white", "orange")) +
  scale_y_continuous(
    trans = transform_log(base = 2),
    labels = label_log(base = 2),
    breaks = 2^seq(-12,10, 2)) +
  scale_x_continuous(
    trans = transform_log(base = 2),
    labels = label_log(base = 2),
    breaks = 2^c(0:15)) +
  labs(
    title = "Group Details:", 
    subtitle = str_c("Area: ", glabs$area,"\nSeason: ", glabs$season, "\nYear: ", glabs$yr ),
    y = "Normalized Abundance",
    x = "Bodyweight (g)")

I can’t auto-locate the tallest bin within all of that geom_histogram plot code, so it will be easier to do it outside and animate them using geom_col.

Here are all of them as an animation:

Code
# Get the plotting summaries
wig_l2_aggs <- wig_l2 %>%
  unite("groupvar", area, season, est_year, sep = "XX") %>%
  split(.$groupvar) %>%
  map_dfr(function(x){

    # Get aggregates
    l2_aggs <- aggregate_log2_bins(x, bin_increment = 1)

    # Locate Tallest
    tallest <- l2_aggs %>%
      arrange(desc(normalized_abund)) %>%
      slice(1) %>%
      pull(left_lim)

    # Add info back to aggregates
    l2_aggs <- l2_aggs %>%
      mutate(is_tallest = if_else(left_lim == tallest, T, F))

    # Return
    return(l2_aggs)}, .id = "groupvar")  %>%
  separate(groupvar, into = c("area", "season", "est_year"), sep = "XX") %>%
  mutate(
    area = factor(area, levels = area_levels_long_all),
    season = factor(season, levels = c("Spring", "Fall")))
Code
# # ggplot with animation
# animated_plot <- ggplot(wig_l2_aggs) +
#   geom_col(
#     aes(
#       x = left_lim,
#       y = normalized_abund,
#       fill = season,
#       color = is_tallest),
#     alpha = 0.6,
#     linewidth = 1.5) +
#   scale_y_continuous(
#     trans = transform_log(base = 2),
#     labels = label_log(base = 2),
#     breaks = 2^seq(-10,16, 2)) +
#   scale_x_continuous(
#     labels = label_math(expr = 10^.x),
#     breaks = c(0:15)) +
#   scale_fill_gmri() +
#   scale_color_manual(values = c("transparent", "black")) +
#   facet_grid(area ~ season, scale = "free") +
#   labs(
#     y = "Normalized Abundance",
#     x = "Bodyweight (g)",
#     title = 'Year: {closest_state}', # Dynamic title for year
#     fill = "Season") +
#   transition_states(
#     est_year,
#     transition_length = 1,
#     state_length = 5) +
#   ease_aes('linear')  # Smooth transition
# 
# # Animate and save
# animate(animated_plot, nframes = 300, fps = 10, width = 1000, height = 800)
# anim_save(here::here("faceted_tallest_bin_spectra_wigley.gif"), animated_plot)



# Show the GIF
knitr::include_graphics(here::here("faceted_tallest_bin_spectra_wigley.gif"))

Bodymass spectra abundance peaks

Looking at Individual Years w/ Observable

I like having the ability to look at them one-by-one, but don’t want to make 800 plots or tabs. I’m using this exercise as an excuse to use observable for interactive selections.

Code
# Reformat years as a date - messes with slider
# year_avgs <- mutate(year_avgs, est_year = as.Date(str_c(est_year, "-06-01")))

# Define data to use for js
ojs_define(
  data = wig_l2_aggs %>% arrange(est_year))
  # data = filter(wig_l2_aggs, normalized_abund > 1) %>% 
  #   arrange(est_year))
Code
viewof yearSlider = Inputs.range(
  [1970, 2019], // If not pulling from the data
  {
  label: "Year:",
  value: 1970, // If not pulling from the data
  step: 1}                    // Step size for numeric slider
)

// Create selections for season and area
//viewof seasonSelect = Inputs.select(["Spring", "Fall"], { label: "Season" })
viewof areaSelect = Inputs.select(["Northeast Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight"], { label: "Area" })


// Filtering Function
filteredData = transpose(data).filter(function(d) {
  return yearSlider == d.est_year && d.area === areaSelect;
})
Code
// Plot with Facets
Plot.plot({
  facet: {
    data: filteredData,
    x: d => d.area, // Columns: area
    y: d => d.season  // Rows: season
    //y: { field: "season", domain: ["Spring", "Fall"] }  // Custom row order - doesn't work
  },
  marks: [
    Plot.barY(filteredData, { 
    x: "left_lim", 
    y: "normalized_abund",
    fill: "season",
    //stroke: "season",
    stroke: d => d.is_tallest ? "black" : "none", // Change stroke color for highlighted bars
    strokeWidth: d => d.is_tallest ? 4 : 0 // Thicker stroke for highlighted bars
    })
  ],
  x: {
    label: "Body Mass (g)",
    tickFormat: d => `2^${d}` // Format labels as powers of 2
  },
  y: {
   label: "Abundance",
   type: "symlog",
   base: 2
  },
  style: {
    facetPadding: 5
  }
})